msr2000 commited on
Commit
1162836
·
verified ·
1 Parent(s): f3602d5

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. LICENSE +21 -0
  2. config.json +67 -0
  3. generation_config.json +9 -0
  4. model-00001-of-000163.safetensors +3 -0
  5. model-00002-of-000163.safetensors +3 -0
  6. model-00003-of-000163.safetensors +3 -0
  7. model-00007-of-000163.safetensors +3 -0
  8. model-00008-of-000163.safetensors +3 -0
  9. model-00009-of-000163.safetensors +3 -0
  10. model-00010-of-000163.safetensors +3 -0
  11. model-00011-of-000163.safetensors +3 -0
  12. model-00012-of-000163.safetensors +3 -0
  13. model-00013-of-000163.safetensors +3 -0
  14. model-00014-of-000163.safetensors +3 -0
  15. model-00015-of-000163.safetensors +3 -0
  16. model-00016-of-000163.safetensors +3 -0
  17. model-00017-of-000163.safetensors +3 -0
  18. model-00018-of-000163.safetensors +3 -0
  19. model-00019-of-000163.safetensors +3 -0
  20. model-00020-of-000163.safetensors +3 -0
  21. model-00021-of-000163.safetensors +3 -0
  22. model-00022-of-000163.safetensors +3 -0
  23. model-00023-of-000163.safetensors +3 -0
  24. model-00024-of-000163.safetensors +3 -0
  25. model-00025-of-000163.safetensors +3 -0
  26. model-00026-of-000163.safetensors +3 -0
  27. model-00027-of-000163.safetensors +3 -0
  28. model-00028-of-000163.safetensors +3 -0
  29. model-00029-of-000163.safetensors +3 -0
  30. model-00030-of-000163.safetensors +3 -0
  31. model-00031-of-000163.safetensors +3 -0
  32. model-00032-of-000163.safetensors +3 -0
  33. model-00033-of-000163.safetensors +3 -0
  34. model-00034-of-000163.safetensors +3 -0
  35. model-00035-of-000163.safetensors +3 -0
  36. model-00036-of-000163.safetensors +3 -0
  37. model-00037-of-000163.safetensors +3 -0
  38. model-00038-of-000163.safetensors +3 -0
  39. model-00039-of-000163.safetensors +3 -0
  40. model-00040-of-000163.safetensors +3 -0
  41. model-00041-of-000163.safetensors +3 -0
  42. model-00042-of-000163.safetensors +3 -0
  43. model-00043-of-000163.safetensors +3 -0
  44. model-00044-of-000163.safetensors +3 -0
  45. model-00045-of-000163.safetensors +3 -0
  46. model-00046-of-000163.safetensors +3 -0
  47. model-00047-of-000163.safetensors +3 -0
  48. model-00048-of-000163.safetensors +3 -0
  49. model-00049-of-000163.safetensors +3 -0
  50. model-00050-of-000163.safetensors +3 -0
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 DeepSeek
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
config.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_deepseek.DeepseekV3Config",
9
+ "AutoModel": "modeling_deepseek.DeepseekV3Model",
10
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
+ },
12
+ "bos_token_id": 0,
13
+ "eos_token_id": 1,
14
+ "ep_size": 1,
15
+ "first_k_dense_replace": 3,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 7168,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 18432,
20
+ "kv_lora_rank": 512,
21
+ "max_position_embeddings": 163840,
22
+ "model_type": "deepseek_v3",
23
+ "moe_intermediate_size": 2048,
24
+ "moe_layer_freq": 1,
25
+ "n_group": 8,
26
+ "n_routed_experts": 256,
27
+ "n_shared_experts": 1,
28
+ "norm_topk_prob": true,
29
+ "num_attention_heads": 128,
30
+ "num_experts_per_tok": 8,
31
+ "num_hidden_layers": 61,
32
+ "num_key_value_heads": 128,
33
+ "num_nextn_predict_layers": 1,
34
+ "q_lora_rank": 1536,
35
+ "qk_nope_head_dim": 128,
36
+ "qk_rope_head_dim": 64,
37
+ "quantization_config": {
38
+ "activation_scheme": "dynamic",
39
+ "fmt": "e4m3",
40
+ "quant_method": "fp8",
41
+ "weight_block_size": [
42
+ 128,
43
+ 128
44
+ ]
45
+ },
46
+ "rms_norm_eps": 1e-06,
47
+ "rope_scaling": {
48
+ "beta_fast": 32,
49
+ "beta_slow": 1,
50
+ "factor": 40,
51
+ "mscale": 1.0,
52
+ "mscale_all_dim": 1.0,
53
+ "original_max_position_embeddings": 4096,
54
+ "type": "yarn"
55
+ },
56
+ "rope_theta": 10000,
57
+ "routed_scaling_factor": 2.5,
58
+ "scoring_func": "sigmoid",
59
+ "tie_word_embeddings": false,
60
+ "topk_group": 4,
61
+ "topk_method": "noaux_tc",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.46.3",
64
+ "use_cache": true,
65
+ "v_head_dim": 128,
66
+ "vocab_size": 129280
67
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "do_sample": true,
6
+ "temperature": 0.6,
7
+ "top_p": 0.95,
8
+ "transformers_version": "4.46.3"
9
+ }
model-00001-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c7ce6cd03018d584e3f52543d1263aeec16945b071fc8d7bceccd6e658b120a
3
+ size 5234139343
model-00002-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aab7d745df8823be76e3716883f2bba98418bb6bd8502ed18a9380f214b242c
3
+ size 4302383966
model-00003-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c5f2a20cb728fff762106117b25cd1ed32f8aa90a5b9238d00098ae9132657c
3
+ size 4302384375
model-00007-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1722230d7e7798fca43983c41227d6cca0173b8acec83e33194c8ee73276d48
3
+ size 4306080097
model-00008-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aafc33a830ea532a71337062f08966312813af47b093cafec8f7dd110e5bf8d1
3
+ size 4302384356
model-00009-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59909f1e24432bc3977b241a09c9db881ae1d6c7fcbf8cfd08f4301d2c4b91a
3
+ size 4302350190
model-00010-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39bcf9b6e20d6aed8dd789bde6e09d4bd5342f0c747812ec2049824392e4d727
3
+ size 4302383960
model-00011-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a235cd6dc2ee08b6c629018ec9c594b2e7feaf784011c200d18ba6856f35746
3
+ size 4302384375
model-00012-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5b6eceb364c0d54ab23b08ea8c0c93b75e3a95ea11cf3476eb2b989934e1457
3
+ size 1321583941
model-00013-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:232b586cd03bc6d6f1e2fe40d9b6b6ce0981759738f9fb0221fae7db5b1a481b
3
+ size 4302317244
model-00014-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e61be2dbc13bf38b703294f4f3740fecd058d3316d7bda316469292d35af6f84
3
+ size 4302384328
model-00015-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c8d55da023f1995499ba0036fdd213cabef74915517eec4314b746b919b42c6
3
+ size 4302350218
model-00016-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:614dd0a21c2f2b1cd13643c51b7e4adc2ae77ce7b51f454a7c7400c076a9e512
3
+ size 4302383932
model-00017-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d2599d5d80dde7dc83f0eb643b7082ce109f7882978167d508c45723b2cd5d5
3
+ size 4302384377
model-00018-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:269aac4d979ee1ee113b8fc32ec4106abe7411cc37ec92db36fac8320a3d7585
3
+ size 4302350026
model-00019-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d74c211ee145c25ae9e73e9271e0e5ea1df2419fc4ba6fe868b77e98167f205c
3
+ size 4302384124
model-00020-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f373c2e0973027918f7294a492b49cb1a01f06552440573cf5b14319cda70ebb
3
+ size 4302384377
model-00021-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b57e606d91653bbc52cc5fb35b6a9fab45360c517a3c287b32d536b9484d7fbb
3
+ size 4302350413
model-00022-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:190506c8fa8f3376a690dee132bcc4e1da04d67dc2dad94d0424ff06defcf028
3
+ size 4302384900
model-00023-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5563f979cfada782649a9aa2d094a11b7d74a1a22830bd411f068cf432cc4b76
3
+ size 4302350808
model-00024-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64d51e099154cc8f2cd547c4f063b9bd68a7c58cf66557fa02bfe3f3d23cb532
3
+ size 4302384504
model-00025-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8dcc877b431fbfdf66d89febd9531caee7fd648309c2d4cf484d8258755fc75
3
+ size 4302384961
model-00026-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44253455b39efc6ef7e8dffddbc4ee9dcfa5e6b1fa7c7ccb5f7ac7d1d1f1e03f
3
+ size 4302350620
model-00027-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e718a563e6137f9a607f328947ec3f17b70bc21c3d0092e0f7ed8c04f8927b3
3
+ size 4302384692
model-00028-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eba4a8a4070f72832a0b04067925054306c2fb3ea2f49b178b5a5c1a0f9ca92c
3
+ size 4302384963
model-00029-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:659c2a5c564379f1e2c04c5dc6d3819ccf41ddff77d300820b25e4d4fa0f288b
3
+ size 4302350448
model-00030-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fee4d9e0063a0c7f4c9061d937469d7eda142be4275e4d55282d7e806d676cb3
3
+ size 4302384884
model-00031-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a86aa089142a6f4f88fb969441196a5a26784c6f6914657995f2146d116ec719
3
+ size 4302350824
model-00032-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94cb70139c1af9712cd8878ae97323bf8eef4a6c74f0cd02b10e4a4053b039ad
3
+ size 4302384488
model-00033-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb193f73287ef47ea6fe50b0e04eb246fbdd23e81672852689510522bbf02907
3
+ size 4302384963
model-00034-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2f876eccfad41bd43dfb3b89f953cfc25a28bc0f7a3de6b0f34441e3665895f
3
+ size 1747417474
model-00035-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec4f764ffb3ec302d0350ac2b317dae8d512bf1948095bfe04dfd4f6588315bc
3
+ size 4302317817
model-00036-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc11d74a8ab383e232b3edf122c2cc646ba773f6c94b4c3672036b28d8925aac
3
+ size 4302384914
model-00037-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c185f09e36d33acf13c1009dd81d67f32d243e0180381970500b447983b4db1
3
+ size 4302350794
model-00038-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a105bccf294a4f0c4bef30a2b6138a2a575f0232c251fb0b8735c9dde59942c
3
+ size 4302384518
model-00039-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2989c230cc48a3efcb0c5ae839359c9caa38d33951e7cccdb5099f1341237bf6
3
+ size 4302384963
model-00040-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fde4799921afaa81ef99d1369c9bbebb70066e27206f36ed9626cbdf797fc64a
3
+ size 4302350602
model-00041-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a21a7cdded35770193b90e8913cb67fa2728640605a0df6198e89539357cd47
3
+ size 4302384710
model-00042-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:183088a89351ad86d8664f600153b706d1cd5afe1b25092da37e7e55a016199b
3
+ size 4302384963
model-00043-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23618078d26ac0061b02d761d664b2430540e76873e06039a8b8a84d3e403ebd
3
+ size 4302350432
model-00044-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:296052f606b1955eccfbd75a6f3cd6ded2bfc83e1f2799cb424784246b28c3b6
3
+ size 4302384900
model-00045-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f32c99a35030370b9f33d092643ef1f63bcd35029ccab75e5b80e96d723b6c7f
3
+ size 4302350808
model-00046-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cd075d647166129faacc7754b886b7b5b293c96d64d7a2e3e13deedbb78b48a
3
+ size 4302384504
model-00047-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aef09435350347994c09af7a0d55e18d01ed74b840d6bb67b284cf7c57f6424f
3
+ size 4302384961
model-00048-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc715427f502833d15855cf97c5566217285588506620aae8598f7cf7ec8d114
3
+ size 4302350620
model-00049-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5e6f9707f5bdd4706bda2eebb78c5e5bdf5d67814403102391e556906e82b28
3
+ size 4302384692
model-00050-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b76b6df34c223c13c7a4d89ebf081ae372d193e92a3e942acc377d44ffe86b44
3
+ size 4302384963